Note:

If you use SpaceFold in published research, please cite:

Niec, R. E., Chu, T., Schernthanner, M., Gur-Cohen, S. et al. Lymphatics act as a signaling hub to regulate intestinal stem cell activity. Cell Stem Cell 2022.

https://doi.org/10.1016/j.stem.2022.05.007

Introduction

In this tutorial we will demonstrate how to use SpaceFold to generate gene expression cartography for mouse small intestine tissue. Starting from the BayesPrism output.

Load the BayesPrism package

suppressWarnings(library(SpaceFold))
#> Loading required package: BayesPrism
#> Loading required package: snowfall
#> Loading required package: snow
#> Loading required package: NMF
#> Loading required package: pkgmaker
#> Loading required package: registry
#> Loading required package: rngtools
#> Loading required package: cluster
#> NMF - BioConductor layer [OK] | Shared memory capabilities [NO: bigmemory] | Cores 71/72
#>   To enable shared memory capabilities, try: install.extras('
#> NMF
#> ')

Load the dataset

The rdata file can be downloaded from "https://figshare.com/ndownloader/files/36113753"

load("SI.sig.bp.rdata")
ls()
#> [1] "SI.sig.bp"
SI.sig.bp
#> Input prism info: 
#> Cell states in each cell type: 
#> $Paneth
#> [1] "Paneth"
#> 
#> $TA
#> [1] "TA"
#> 
#> $`bottom zone enterocyte`
#> [1] "bottom zone enterocyte"
#> 
#> $`mid zone enterocyte`
#> [1] "mid zone enterocyte"
#> 
#> $`Lgr5+ progenitor`
#> [1] "Lgr5+ progenitor"
#> 
#> $`goblet cycling`
#> [1] "goblet cycling"
#> 
#> $plasma
#> [1] "plasma"
#> 
#> $`goblet 1`
#> [1] "goblet 1"
#> 
#> $`Lgr5+ stem`
#> [1] "Lgr5+ stem"
#> 
#> $`secretory progenitor`
#> [1] "secretory progenitor"
#> 
#> $`goblet 2`
#> [1] "goblet 2"
#> 
#> $macrophage
#> [1] "macrophage"
#> 
#> $`top zone enterocyte`
#> [1] "top zone enterocyte"
#> 
#> $tuft
#> [1] "tuft"
#> 
#> $`neurons/enteroendocrine`
#> [1] "neurons/enteroendocrine"
#> 
#> $`cycling/GC B cell`
#> [1] "cycling/GC B cell"
#> 
#> $myofibroblast
#> [1] "myofibroblast"
#> 
#> $`stromal 3`
#> [1] "stromal 3"
#> 
#> $`cDC/monocyte`
#> [1] "cDC/monocyte"
#> 
#> $`blood endothelium`
#> [1] "blood endothelium"
#> 
#> $`stromal 1/2`
#> [1] "stromal 1/2"
#> 
#> $lymphatic
#> [1] "lymphatic"
#> 
#> $`T cell`
#> [1] "T cell"
#> 
#> $pDC
#> [1] "pDC"
#> 
#> $glial
#> [1] "glial"
#> 
#> $`resting B cell`
#> [1] "resting B cell"
#> 
#> 
#> Identifier of the malignant cell type:  NA 
#> Number of cell states:  26 
#> Number of cell types:  26 
#> Number of mixtures:  4487 
#> Number of genes:  3908 
#> 
#> Initial cell type fractions: 
#>         Paneth    TA bottom zone enterocyte mid zone enterocyte Lgr5+ progenitor goblet cycling plasma goblet 1 Lgr5+ stem secretory progenitor goblet 2 macrophage top zone enterocyte  tuft neurons/enteroendocrine cycling/GC B cell myofibroblast stromal 3 cDC/monocyte blood endothelium stromal 1/2 lymphatic T cell   pDC glial resting B cell
#> Min.     0.000 0.000                  0.000               0.004            0.000          0.000  0.000    0.000      0.000                0.000    0.000      0.000               0.000 0.000                   0.000             0.000         0.002     0.000        0.000             0.000       0.000     0.000  0.000 0.000 0.000          0.000
#> 1st Qu.  0.005 0.002                  0.004               0.115            0.002          0.006  0.002    0.003      0.002                0.006    0.002      0.003               0.029 0.004                   0.001             0.001         0.019     0.003        0.005             0.003       0.005     0.002  0.002 0.001 0.001          0.001
#> Median   0.012 0.006                  0.010               0.255            0.005          0.014  0.005    0.006      0.004                0.015    0.006      0.004               0.092 0.008                   0.002             0.002         0.044     0.008        0.009             0.005       0.011     0.004  0.004 0.002 0.002          0.002
#> Mean     0.050 0.038                  0.025               0.282            0.018          0.035  0.013    0.009      0.014                0.036    0.022      0.007               0.196 0.014                   0.006             0.003         0.156     0.018        0.013             0.009       0.016     0.007  0.006 0.003 0.003          0.002
#> 3rd Qu.  0.039 0.019                  0.025               0.411            0.016          0.045  0.012    0.011      0.013                0.039    0.031      0.008               0.363 0.018                   0.005             0.004         0.134     0.022        0.017             0.011       0.022     0.009  0.006 0.004 0.004          0.003
#> Max.     0.778 0.741                  0.357               0.859            0.313          0.309  0.352    0.124      0.261                0.349    0.259      0.217               0.794 0.123                   0.222             0.066         0.886     0.324        0.127             0.245       0.135     0.171  0.395 0.033 0.109          0.016
#> Updated cell type fractions: 
#>         Paneth    TA bottom zone enterocyte mid zone enterocyte Lgr5+ progenitor goblet cycling plasma goblet 1 Lgr5+ stem secretory progenitor goblet 2 macrophage top zone enterocyte  tuft neurons/enteroendocrine cycling/GC B cell myofibroblast stromal 3 cDC/monocyte blood endothelium stromal 1/2 lymphatic T cell   pDC glial resting B cell
#> Min.     0.000 0.000                  0.000               0.002            0.000          0.000  0.000    0.000      0.000                0.000    0.000      0.000               0.000 0.000                   0.000             0.000         0.002     0.000        0.000             0.000       0.000     0.000  0.000 0.000 0.000          0.000
#> 1st Qu.  0.004 0.003                  0.006               0.063            0.003          0.005  0.002    0.004      0.003                0.005    0.003      0.002               0.035 0.004                   0.001             0.002         0.017     0.002        0.003             0.003       0.005     0.002  0.002 0.002 0.001          0.002
#> Median   0.011 0.010                  0.016               0.196            0.007          0.011  0.005    0.007      0.007                0.015    0.008      0.004               0.094 0.008                   0.002             0.004         0.039     0.006        0.007             0.006       0.011     0.005  0.003 0.003 0.002          0.003
#> Mean     0.049 0.052                  0.048               0.227            0.021          0.031  0.013    0.012      0.020                0.039    0.022      0.007               0.199 0.014                   0.006             0.005         0.153     0.018        0.011             0.009       0.017     0.008  0.005 0.005 0.005          0.005
#> 3rd Qu.  0.037 0.038                  0.052               0.354            0.019          0.038  0.011    0.014      0.021                0.044    0.030      0.008               0.362 0.017                   0.005             0.006         0.123     0.023        0.014             0.011       0.023     0.010  0.006 0.006 0.005          0.006
#> Max.     0.783 0.798                  0.505               0.748            0.402          0.291  0.351    0.191      0.300                0.390    0.263      0.196               0.800 0.133                   0.242             0.085         0.892     0.333        0.124             0.259       0.139     0.179  0.374 0.054 0.156          0.058

gene.tab <- read.table("features.tsv.gz",sep="\t",check.names=F,header=F)[,1:2]
colnames(gene.tab) <- c("id",'symbol')
rownames(gene.tab) <- gene.tab[,"id"]
head(gene.tab)
#>                                    id  symbol
#> ENSMUSG00000051951 ENSMUSG00000051951    Xkr4
#> ENSMUSG00000089699 ENSMUSG00000089699  Gm1992
#> ENSMUSG00000102331 ENSMUSG00000102331 Gm19938
#> ENSMUSG00000102343 ENSMUSG00000102343 Gm37381
#> ENSMUSG00000025900 ENSMUSG00000025900     Rp1
#> ENSMUSG00000025902 ENSMUSG00000025902   Sox17
sf.obj <- new.sf(bp.obj = SI.sig.bp, feature= gene.tab)
#NOT run
sf.obj.part1 <- sf.obj[sf.obj$label=="part1"]
sf.obj.part2 <- sf.obj[sf.obj$label=="part2"]

# where label is a colname of sf.obj@meta to indicate the variable based on which to subset. 
rm(SI.sig.bp)
gc()
#>             used   (Mb) gc trigger    (Mb)   max used    (Mb)
#> Ncells   7528739  402.1   12346808   659.4    9597161   512.6
#> Vcells 924879204 7056.3 2420693859 18468.5 2013357003 15360.7
sf.obj <- compute.background.level(sf.obj, 
                                   theta.cutoffs.user = 0.001, 
                                   Znk.cutoffs.user = 20,
                                   posterior.cutoff = 0.7) #posterior value above which is assigned to the cluster. Default=0.7.
#> fitting mixture models on theta... 
#> Current cell type: Paneth  TA  bottom zone enterocyte  mid zone enterocyte  Lgr5+ progenitor  goblet cycling  plasma  goblet 1  Lgr5+ stem  secretory progenitor  goblet 2  macrophage  top zone enterocyte  tuft  neurons/enteroendocrine  cycling/GC B cell  myofibroblast  stromal 3  cDC/monocyte  blood endothelium  stromal 1/2  lymphatic  T cell  pDC  glial  resting B cell  
#> fitting mixture models on Znk... 
#> Current cell type: Paneth  TA  bottom zone enterocyte  mid zone enterocyte  Lgr5+ progenitor  goblet cycling  plasma  goblet 1  Lgr5+ stem  secretory progenitor  goblet 2  macrophage  top zone enterocyte  tuft  neurons/enteroendocrine  cycling/GC B cell  myofibroblast  stromal 3  cDC/monocyte  blood endothelium  stromal 1/2  lymphatic  T cell  pDC  glial  resting B cell
sf.obj
#> Data info: 
#> Number of spatial spots:  4487 
#> Number of genes:  3908 
#> Fraction of cell types: 
#>         Paneth    TA bottom zone enterocyte mid zone enterocyte Lgr5+ progenitor goblet cycling plasma goblet 1 Lgr5+ stem secretory progenitor goblet 2 macrophage top zone enterocyte  tuft neurons/enteroendocrine cycling/GC B cell myofibroblast stromal 3 cDC/monocyte blood endothelium stromal 1/2 lymphatic T cell   pDC glial resting B cell
#> Min.     0.000 0.000                  0.000               0.002            0.000          0.000  0.000    0.000      0.000                0.000    0.000      0.000               0.000 0.000                   0.000             0.000         0.002     0.000        0.000             0.000       0.000     0.000  0.000 0.000 0.000          0.000
#> 1st Qu.  0.004 0.003                  0.006               0.063            0.003          0.005  0.002    0.004      0.003                0.005    0.003      0.002               0.035 0.004                   0.001             0.002         0.017     0.002        0.003             0.003       0.005     0.002  0.002 0.002 0.001          0.002
#> Median   0.011 0.010                  0.016               0.196            0.007          0.011  0.005    0.007      0.007                0.015    0.008      0.004               0.094 0.008                   0.002             0.004         0.039     0.006        0.007             0.006       0.011     0.005  0.003 0.003 0.002          0.003
#> Mean     0.049 0.052                  0.048               0.227            0.021          0.031  0.013    0.012      0.020                0.039    0.022      0.007               0.199 0.014                   0.006             0.005         0.153     0.018        0.011             0.009       0.017     0.008  0.005 0.005 0.005          0.005
#> 3rd Qu.  0.037 0.038                  0.052               0.354            0.019          0.038  0.011    0.014      0.021                0.044    0.030      0.008               0.362 0.017                   0.005             0.006         0.123     0.023        0.014             0.011       0.023     0.010  0.006 0.006 0.005          0.006
#> Max.     0.783 0.798                  0.505               0.748            0.402          0.291  0.351    0.191      0.300                0.390    0.263      0.196               0.800 0.133                   0.242             0.085         0.892     0.333        0.124             0.259       0.139     0.179  0.374 0.054 0.156          0.058
#> Total reads of each cell type : 
#>            Paneth       TA bottom zone enterocyte mid zone enterocyte Lgr5+ progenitor goblet cycling   plasma goblet 1 Lgr5+ stem secretory progenitor goblet 2 macrophage top zone enterocyte    tuft neurons/enteroendocrine cycling/GC B cell myofibroblast stromal 3 cDC/monocyte blood endothelium stromal 1/2 lymphatic   T cell     pDC    glial resting B cell
#> Min.        0.240    2.108                  2.724               8.752            1.512          2.088    0.108    0.948      1.824                2.968    0.572      1.072               3.296   1.844                   0.596             1.360         5.172     1.352        0.776             1.412       2.040     1.384    1.204   1.148    0.756          1.104
#> 1st Qu.    13.758    6.234                 12.234             280.864            5.822         17.496    5.388    7.664      5.728               23.670    4.334      5.874              75.304  12.250                   2.880             4.310        71.176    10.900       12.628             7.456      14.236     6.342    5.078   3.844    2.184          3.152
#> Median     35.872   13.744                 24.716             666.348           14.156         39.436   13.888   15.236     11.660               44.864   14.468     10.764             208.844  23.004                   5.876             6.328       114.680    20.404       26.152            14.504      30.500    11.420    9.504   5.596    3.500          4.048
#> Mean      174.580  131.702                 82.612            1292.645           42.058        144.589   56.914   37.981     35.478               90.892  129.679     29.684            1006.261  41.857                  21.452             8.208       564.336    58.462       49.585            28.811      55.470    21.732   20.426   7.030   10.770          4.624
#> 3rd Qu.   111.976   45.844                 61.982            1722.730           39.680        128.338   35.904   34.698     27.666               94.958  128.210     29.452            1405.994  46.726                  12.456             9.672       329.224    58.750       56.058            30.856      65.746    22.296   18.836   8.270    6.958          5.356
#> Max.    11395.568 5206.788               2985.000           15032.564          987.392       3426.984 4003.808 1228.896   1303.548             1648.472 2133.264   1295.532           12672.224 787.912                2222.660           325.956      6929.324  2134.680     1760.776           669.100    1362.520   627.464 1949.016 123.648 1072.252         89.288
#> Number of spots above background for each cell type: 
#>                  Paneth                      TA  bottom zone enterocyte     mid zone enterocyte        Lgr5+ progenitor          goblet cycling                  plasma                goblet 1              Lgr5+ stem    secretory progenitor                goblet 2              macrophage     top zone enterocyte                    tuft neurons/enteroendocrine       cycling/GC B cell           myofibroblast               stromal 3            cDC/monocyte       blood endothelium             stromal 1/2               lymphatic                  T cell                     pDC                   glial          resting B cell 
#>                     813                     858                     651                    1718                     690                    1180                     425                     524                     807                     809                    1739                    1009                    1785                     702                     348                     170                     961                    1228                     483                     250                     777                     579                     224                      83                     334                      11 
#> 
#> SpaceFold parameter. 
#> $theta.cutoffs
#>                  Paneth                      TA  bottom zone enterocyte     mid zone enterocyte        Lgr5+ progenitor          goblet cycling                  plasma                goblet 1              Lgr5+ stem    secretory progenitor                goblet 2              macrophage     top zone enterocyte                    tuft neurons/enteroendocrine       cycling/GC B cell           myofibroblast               stromal 3            cDC/monocyte       blood endothelium             stromal 1/2               lymphatic                  T cell                     pDC                   glial          resting B cell 
#>             0.041834734             0.040299262             0.054212311             0.173113661             0.025987854             0.018407931             0.026216806             0.017803111             0.016356786             0.035378190             0.009513852             0.007373489             0.001000000             0.014737803             0.010907857             0.005373429             0.210660124             0.009750703             0.018435523             0.025208389             0.014597346             0.011833999             0.013053149             0.008206102             0.009461187             0.005404264 
#> 
#> $Znk.cutoffs
#>                  Paneth                      TA  bottom zone enterocyte     mid zone enterocyte        Lgr5+ progenitor          goblet cycling                  plasma                goblet 1              Lgr5+ stem    secretory progenitor                goblet 2              macrophage     top zone enterocyte                    tuft neurons/enteroendocrine       cycling/GC B cell           myofibroblast               stromal 3            cDC/monocyte       blood endothelium             stromal 1/2               lymphatic                  T cell                     pDC                   glial          resting B cell 
#>                 140.268                  56.592                  84.392                 938.004                  34.436                  94.764                  55.972                  48.904                  35.004                 107.500                  24.776                  22.648                 376.460                  55.344                  24.596                  20.000                 266.536                  43.212                  75.044                  39.924                  74.748                  28.300                  30.360                  20.000                  20.000                  20.000 
#> 
#> $posterior.cutoff
#> [1] 0.7
sf.obj <- run.phate(sf.obj, 
                    if.invert=TRUE, # invert the final axis as the orientation is arbitruary
                    n.jobs=20) # run using 20 CPU threads 
# set the ordering of cell types (same as the paper)
myorder <- c("myofibroblast","glial","stromal 3","stromal 1/2","blood endothelium","lymphatic",
             "Paneth","Lgr5+ stem","Lgr5+ progenitor","TA","bottom zone enterocyte","mid zone enterocyte","top zone enterocyte",
             "secretory progenitor","goblet cycling","goblet 1","goblet 2","tuft","neurons/enteroendocrine",
             "resting B cell","cycling/GC B cell","plasma","T cell","cDC/monocyte","macrophage","pDC")

plot.beeswarm(sf.obj, 
              cell.type.order= myorder, 
              pdf.prefix="output") #set pdf.prefix to your name. Default is output. 
#> png 
#>   2

Gene expression cartography


group.list <- list(enterocytes=c("bottom zone enterocyte","mid zone enterocyte","top zone enterocyte"))


sf.obj <- merge.cell.type (sf.obj= sf.obj,
                       grouping.list = group.list)

sf.obj <- denoise.cartography (sf.obj= sf.obj, 
                               ka=5, 
                               tansition=1000)

top.zone.genes <- c("Apoa4","Apoc3","Clca4a","Krt20", "S100a10","S100a6","Slc25a22","Tmsb4x")

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="raw",
                             denoise=F,
                             selected.genes= top.zone.genes,
                             selected.cell.types="enterocytes",
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.75,
                             pdf.prefix="SI.topzone.v1")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="raw",
                             denoise=T,
                             selected.genes= top.zone.genes,
                             selected.cell.types="enterocytes",
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.25,
                             pdf.prefix="SI.topzone.v2")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels

bottom.zone.genes <- c("Atp5a1","Atp5g3","Ccl25", "Cox4i1", "Cox6c", "Gpx2", "Lgals4" ,"Plac8")


plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="raw",
                             denoise=F,
                             selected.genes= bottom.zone.genes,
                             selected.cell.types="enterocytes",
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.75,
                             pdf.prefix="SI.bottomzone.v1")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="raw",
                             denoise=T,
                             selected.genes= bottom.zone.genes,
                             selected.cell.types="enterocytes",
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.25,
                             pdf.prefix="SI.bottomzone.v2")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels
secreted_factors=c('Lyve1','Ntn1','Il33','Wnt2','Ccl21a','Rspo3','Reln')
plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="norm",
                             denoise=T,
                             selected.genes= secreted_factors,
                             selected.cell.types=c('lymphatic','blood endothelium'),
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.25,
                             pdf.prefix="SI.secreted_factors.v2")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="norm",
                             denoise=F,
                             bin.by="equal.size",
                             selected.genes= secreted_factors,
                             selected.cell.types=c('lymphatic','blood endothelium'),
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.75,
                             pdf.prefix="SI.secreted_factors.v1")
#> Warning in brewer.pal(color.num, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="norm",
                             denoise=F,
                             selected.genes= c("Lyz1","Lgr5","Sox9"),
                             selected.cell.types=c("Paneth","Lgr5+ stem","Lgr5+ progenitor","TA", "enterocytes"),
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.75,
                             pdf.prefix="SI.Lyz1_Lgr5_Sox9.v1")

plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="norm",
                             denoise=T,
                             selected.genes= c("Lyz1","Lgr5","Sox9"),
                             selected.cell.types=c("Paneth","Lgr5+ stem","Lgr5+ progenitor","TA", "enterocytes"),
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.25,
                             pdf.prefix="SI.Lyz1_Lgr5_Sox9.v2")
sf.obj <- select.spot (sf.obj= sf.obj, cell.type="Paneth", selected.idx= sf.obj@SpaceFold.axis<0.5, op="and")
sf.obj <- select.spot (sf.obj= sf.obj, cell.type="Lgr5+ stem", selected.idx= sf.obj@SpaceFold.axis<0.5, op="and")
sf.obj <- select.spot (sf.obj= sf.obj, cell.type="top zone enterocyte", selected.idx= sf.obj@SpaceFold.axis>0.5, op="and")
sf.obj <- merge.cell.type (sf.obj= sf.obj,
                       grouping.list = group.list)

sf.obj <- denoise.cartography (sf.obj= sf.obj, ka=5, tansition=1000)
plot.cartography (sf.obj= sf.obj,
                             raw.or.norm="norm",
                             denoise=F,
                             selected.genes= c("Lyz1","Lgr5","Sox9"),
                             selected.cell.types=c("Paneth","Lgr5+ stem","Lgr5+ progenitor","TA", "enterocytes"),
                             show.raw =FALSE,
                             overlay.hist=TRUE,
                             span=0.75,
                             pdf.prefix="SI.Lyz1_Lgr5_Sox9.v1")